##############################################################################
#Replication script for:
#Gavras, K., Höhne, J.K., Blom, A.G., Schoen, H. (2022). Innovating the
#collection of open-ended answers: The linguistic and content characteristics
#of written and oral answers to political attitude questions
##############################################################################

library(dplyr)
library(tidyverse)
library(sjPlot)

dataAnalysisRespondents <- readRDS("dataverse_replication_dataset.rds")


#####################################################################
#Table 1. Sample composition by request conditions (written and oral)
#####################################################################

table(dataAnalysisRespondents$TextCondition) #TextCondition 1 = written, TextCondition 0 = oral

sampleComposition <- dataAnalysisRespondents %>% 
  group_by(TextCondition) %>%
  summarise(birthyearMedian = median(birthyear, na.rm = T), #birthyear 7 = 1970-1974
            femaleProp = mean(female, na.rm = T),
            lowerEducationProp = mean(lowerEducation, na.rm = T),
            mediumEducationProp = mean(mediumEducation, na.rm = T), #Due to rounding the decimal slightly differs.
            highEducationProp = mean(highEducation, na.rm = T),
            westGermanyProp = mean(westGermany, na.rm = T),
            motherTongueGermanProp = mean(motherTongueGerman, na.rm = T),
            smartphoneSkillsMean = mean(smartphoneSkills, na.rm = T), #Due to rounding the decimal slightly differs.
            internetUsageMean = mean(internetUsage, na.rm = T),
            voiceMessageProp = mean(voiceMessage, na.rm = T),
            voteIntentionMean = mean(voteIntention, na.rm = T),
            polKnowledgeProp = mean(polKnowledge, na.rm = T),
            CDUCSUvoterProp = mean(CDUCSUvoter, na.rm = T),
            SPDvoterProp = mean(SPDvoter, na.rm = T),
            GreensvoterProp = mean(Greensvoter, na.rm = T),
            AfDvoterProp = mean(AfDvoter, na.rm = T))

t(sampleComposition)


############################################
#additional analyses: item-nonresponse rates
############################################

round(prop.table(table(dataAnalysisRespondents$TextCondition, dataAnalysisRespondents$ProblemNonResponse), 1), digits = 2)
round(prop.table(table(dataAnalysisRespondents$TextCondition, dataAnalysisRespondents$ChancellorNonResponse), 1), digits = 2)
round(prop.table(table(dataAnalysisRespondents$TextCondition, dataAnalysisRespondents$CDUCSUNonResponse), 1), digits = 2)
round(prop.table(table(dataAnalysisRespondents$TextCondition, dataAnalysisRespondents$SPDNonResponse), 1), digits = 2)
round(prop.table(table(dataAnalysisRespondents$TextCondition, dataAnalysisRespondents$GreensNonResponse), 1), digits = 2)
round(prop.table(table(dataAnalysisRespondents$TextCondition, dataAnalysisRespondents$AfDNonResponse), 1), digits = 2)


################################################################################
#Figure 1. Logistic regressions for comparing the written (written = 1) and oral
#conditions in terms of differential dropout
################################################################################

m_written <- glm(TextCondition ~ as.factor(abs(1-westGermany)) + as.factor(female) + birthyear + 
                   as.factor(mediumEducation) + as.factor(highEducation) + 
                   as.factor(motherTongueGerman) + smartphoneSkills + internetUsage + 
                   as.factor(voiceMessage) + voteIntention + as.factor(polKnowledge) + 
                   as.factor(CDUCSUvoter) + as.factor(SPDvoter) + as.factor(Greensvoter) + 
                   as.factor(AfDvoter),
                 data = dataAnalysisRespondents, 
                 family = "binomial")

summary(m_written)

sjPlot::plot_models(m_written, 
                    transform = NULL, 
                    grid = T, 
                    show.legend = F, 
                    m.labels = c("Written vs. oral"),
                    vline.color = "grey60",
                    dot.size = 4,
                    line.size = 1.5,
                    colors = c("black", "black"),
                    axis.labels = c("AfD voter", "Greens voter", "SPD voter", "CDU/CSU voter", "Political knowledge", "Intention to vote", "Voice messaging", "Internet usage", "Smartphone skills", "Mother tongue German", "Education: high", "Education: medium", "Year of birth", "Female", "East Germany")) + 
  labs(y = "Coefficients") + 
  theme_sjplot(base_size = 28)


###########################################################
#Table 2. Average answer length of written and oral answers
###########################################################

var.test(ProblemLength ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ProblemLength > 0), ])
var.test(ChancellorLength ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ChancellorLength > 0), ])
var.test(CDUCSULength ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$CDUCSULength > 0), ])
var.test(SPDLength ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$SPDLength > 0), ])
var.test(GreensLength ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$GreensLength > 0), ])
var.test(AfDLength ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$AfDLength > 0), ])

t.test(ProblemLength ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ProblemLength > 0), ], var.equal = F)
t.test(ChancellorLength ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ChancellorLength > 0), ], var.equal = F)
t.test(CDUCSULength ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$CDUCSULength > 0), ], var.equal = F) #Due to rounding the decimal slightly differs.
t.test(SPDLength ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$SPDLength > 0), ], var.equal = F)
t.test(GreensLength ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$GreensLength > 0), ], var.equal = F)
t.test(AfDLength ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$AfDLength > 0), ], var.equal = F)


################################################################################
#Table 3. Lexical structure (lexical richness, lexical diversity and readability) 
#of written and oral answers
################################################################################

#Yule's K
var.test(ProblemK ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ProblemLength > 0), ])
var.test(ChancellorK ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ChancellorLength > 0), ])
var.test(CDUCSUK ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$CDUCSULength > 0), ])
var.test(SPDK ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$SPDLength > 0), ])
var.test(GreensK ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$GreensLength > 0), ])
var.test(AfDK ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$AfDLength > 0), ])

t.test(ProblemK ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ProblemLength > 0), ], var.equal = F)
t.test(ChancellorK ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ChancellorLength > 0), ], var.equal = F)
t.test(CDUCSUK ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$CDUCSULength > 0), ], var.equal = F)
t.test(SPDK ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$SPDLength > 0), ], var.equal = F)
t.test(GreensK ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$GreensLength > 0), ], var.equal = T)
t.test(AfDK ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$AfDLength > 0), ], var.equal = F)

#Type-token ratio (TTR)
var.test(ProblemTTR ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ProblemLength > 0), ])
var.test(ChancellorTTR ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ChancellorLength > 0), ])
var.test(CDUCSUTTR ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$CDUCSULength > 0), ])
var.test(SPDTTR ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$SPDLength > 0), ])
var.test(GreensTTR ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$GreensLength > 0), ])
var.test(AfDTTR ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$AfDLength > 0), ])

t.test(ProblemTTR ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ProblemLength > 0), ], var.equal = F)
t.test(ChancellorTTR ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ChancellorLength > 0), ], var.equal = F)
t.test(CDUCSUTTR ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$CDUCSULength > 0), ], var.equal = F)
t.test(SPDTTR ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$SPDLength > 0), ], var.equal = F)
t.test(GreensTTR ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$GreensLength > 0), ], var.equal = F)
t.test(AfDTTR ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$AfDLength > 0), ], var.equal = F)

#Flesch reading ease (FRE)
var.test(ProblemFRE ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ProblemLength > 0), ])
var.test(ChancellorFRE ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ChancellorLength > 0), ])
var.test(CDUCSUFRE ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$CDUCSULength > 0), ])
var.test(SPDFRE ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$SPDLength > 0), ])
var.test(GreensFRE ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$GreensLength > 0), ])
var.test(AfDFRE ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$AfDLength > 0), ])

t.test(ProblemFRE ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ProblemLength > 0), ], var.equal = T) #Test with equal instead of unequal variances.
t.test(ChancellorFRE ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ChancellorLength > 0), ], var.equal = F)
t.test(CDUCSUFRE ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$CDUCSULength > 0), ], var.equal = F)
t.test(SPDFRE ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$SPDLength > 0), ], var.equal = F)
t.test(GreensFRE ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$GreensLength > 0), ], var.equal = F) #Due to rounding the decimal slightly differs.
t.test(AfDFRE ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$AfDLength > 0), ], var.equal = F)


#####################################################
#Table 4. Sentiment ratio of written and oral answers
#####################################################

var.test(ProblemSentiScore ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ProblemLength > 0), ])
var.test(ChancellorSentiScore ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ChancellorLength > 0), ])
var.test(CDUCSUSentiScore ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$CDUCSULength > 0), ])
var.test(SPDSentiScore ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$SPDLength > 0), ])
var.test(GreensSentiScore ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$GreensLength > 0), ])
var.test(AfDSentiScore ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$AfDLength > 0), ])

t.test(ProblemSentiScore ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ProblemLength > 0), ], var.equal = T) #p-level of < 0.05 instead of < 0.001. 
t.test(ChancellorSentiScore ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ChancellorLength > 0), ], var.equal = T)
t.test(CDUCSUSentiScore ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$CDUCSULength > 0), ], var.equal = T)
t.test(SPDSentiScore ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$SPDLength > 0), ], var.equal = T)
t.test(GreensSentiScore ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$GreensLength > 0), ], var.equal = F)
t.test(AfDSentiScore ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$AfDLength > 0), ], var.equal = T)


##############################################################
#Table 5. Average number of topics in written and oral answers
##############################################################

var.test(ProblemTopicNumber ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ProblemLength > 0), ])
var.test(ChancellorTopicNumber ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ChancellorLength > 0), ])
var.test(CDUCSUTopicNumber ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$CDUCSULength > 0), ])
var.test(SPDTopicNumber ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$SPDLength > 0), ])
var.test(GreensTopicNumber ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$GreensLength > 0), ])
var.test(AfDTopicNumber ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$AfDLength > 0), ])

t.test(ProblemTopicNumber ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ProblemLength > 0), ], var.equal = T)
t.test(ChancellorTopicNumber ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ChancellorLength > 0), ], var.equal = T)
t.test(CDUCSUTopicNumber ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$CDUCSULength > 0), ], var.equal = T)
t.test(SPDTopicNumber ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$SPDLength > 0), ], var.equal = F) #Test with unequal instead of equal variances.
t.test(GreensTopicNumber ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$GreensLength > 0), ], var.equal = F) #Due to rounding the decimal slightly differs / Test with unequal instead of equal variances.
t.test(AfDTopicNumber ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$AfDLength > 0), ], var.equal = F) #Test with unequal instead of equal variances.


########################################
#Table 6. Topics of written oral answers
########################################

#Due to the stochastic nature of topic models (Ballester/Penner 2022; 
#Journal of Informetrics) it is not possible to exactly replicate the results 
#presented in the table.


#######################################################################
#Table 7. Effective number of topics (ENTs) in written and oral answers
#######################################################################

#Due to the stochastic nature of topic models (Ballester/Penner 2022; 
#Journal of Informetrics) it is not possible to exactly replicate the results 
#presented in the table.


################################################################################
################################################################################
#Robustness Checks
################################################################################
################################################################################

######################################################################################
#Table A1: Sample composition between item response and non-response in the oral group
######################################################################################

sampleComposition <- dataAnalysisRespondents %>%
  filter(TextCondition == 0) %>%
  group_by(ProblemNonResponse) %>%
  summarise(birthyearMedian = median(birthyear, na.rm = T),
            femaleProp = mean(female, na.rm = T),
            lowerEducationProp = mean(lowerEducation, na.rm = T),
            mediumEducationProp = mean(mediumEducation, na.rm = T),
            highEducationProp = mean(highEducation, na.rm = T),
            westGermanyProp = mean(westGermany, na.rm = T),
            motherTongueGermanProp = mean(motherTongueGerman, na.rm = T),
            smartphoneSkillsMean = mean(smartphoneSkills, na.rm = T),
            internetUsageMean = mean(internetUsage, na.rm = T),
            voiceMessageProp = mean(voiceMessage, na.rm = T),
            voteIntentionMean = mean(voteIntention, na.rm = T),
            polKnowledgeProp = mean(polKnowledge, na.rm = T),
            CDUCSUvoterProp = mean(CDUCSUvoter, na.rm = T),
            SPDvoterProp = mean(SPDvoter, na.rm = T),
            GreensvoterProp = mean(Greensvoter, na.rm = T),
            AfDvoterProp = mean(AfDvoter, na.rm = T))

t(sampleComposition)

dataAnalysisRespondentsOral <- subset(dataAnalysisRespondents, TextCondition == 0)

chisq.test(dataAnalysisRespondentsOral$ProblemNonResponse, dataAnalysisRespondentsOral$female)
chisq.test(dataAnalysisRespondentsOral$ProblemNonResponse, dataAnalysisRespondentsOral$lowerEducation)
chisq.test(dataAnalysisRespondentsOral$ProblemNonResponse, dataAnalysisRespondentsOral$mediumEducation)
chisq.test(dataAnalysisRespondentsOral$ProblemNonResponse, dataAnalysisRespondentsOral$highEducation)
chisq.test(dataAnalysisRespondentsOral$ProblemNonResponse, dataAnalysisRespondentsOral$westGermany)
chisq.test(dataAnalysisRespondentsOral$ProblemNonResponse, dataAnalysisRespondentsOral$motherTongueGerman)

var.test(smartphoneSkills ~ ProblemNonResponse, data = dataAnalysisRespondentsOral)
t.test(smartphoneSkills ~ ProblemNonResponse, data = dataAnalysisRespondentsOral, var.equal = T)

var.test(internetUsage ~ ProblemNonResponse, data = dataAnalysisRespondentsOral)
t.test(internetUsage ~ ProblemNonResponse, data = dataAnalysisRespondentsOral, var.equal = T)

chisq.test(dataAnalysisRespondentsOral$ProblemNonResponse, dataAnalysisRespondentsOral$voiceMessage)

var.test(voteIntention ~ ProblemNonResponse, data = dataAnalysisRespondentsOral)
t.test(voteIntention ~ ProblemNonResponse, data = dataAnalysisRespondentsOral, var.equal = F)

chisq.test(dataAnalysisRespondentsOral$ProblemNonResponse, dataAnalysisRespondentsOral$polKnowledge)
chisq.test(dataAnalysisRespondentsOral$ProblemNonResponse, dataAnalysisRespondentsOral$CDUCSUvoter)
chisq.test(dataAnalysisRespondentsOral$ProblemNonResponse, dataAnalysisRespondentsOral$SPDvoter)
chisq.test(dataAnalysisRespondentsOral$ProblemNonResponse, dataAnalysisRespondentsOral$Greensvoter)
chisq.test(dataAnalysisRespondentsOral$ProblemNonResponse, dataAnalysisRespondentsOral$AfDvoter)


########################################################################################
#Figure A1. Logistic regressions for comparing item non-response (item non-response = 1) 
#and item response in the oral group
########################################################################################

m_voiceanswer <- glm(ProblemNonResponse ~ as.factor(abs(1-westGermany)) + as.factor(female) + birthyear + 
                       as.factor(mediumEducation) + as.factor(highEducation) + 
                       as.factor(motherTongueGerman) + smartphoneSkills + internetUsage + 
                       as.factor(voiceMessage) + voteIntention + as.factor(polKnowledge) + 
                       as.factor(CDUCSUvoter) + as.factor(SPDvoter) + as.factor(Greensvoter) + 
                       as.factor(AfDvoter),
                     data = dataAnalysisRespondents[which(dataAnalysisRespondents$TextCondition == 0), ], 
                     family = "binomial")

summary(m_voiceanswer)

sjPlot::plot_models(m_voiceanswer, 
                    transform = NULL, 
                    grid = T, 
                    show.legend = F, 
                    m.labels = c("Item non-response"),
                    vline.color = "grey60",
                    dot.size = 4,
                    line.size = 1.5,
                    colors = c("black", "black"),
                    axis.labels = c("AfD voter", "Greens voter", "SPD voter", "CDU/CSU voter", "Political knowledge", "Intention to vote", "Voice messaging", "Internet usage", "Smartphone skills", "Mother tongue German", "Education: high", "Education: medium", "Year of birth", "Female", "East Germany")) + 
  labs(y = "Coefficients") + 
  theme_sjplot(base_size = 28)


#############################################################################
#Multi-level regression with questions nested in respondents on answer length
#############################################################################

dataAnswerLength <- dataAnalysisRespondents %>%
  pivot_longer("ProblemLength":"AfDLength", names_to = "question", values_to = "answerLength")

resultsRobustAnswerLength <- lm(answerLength ~ as.factor(TextCondition) + as.factor(question), data = dataAnswerLength)
summary(resultsRobustAnswerLength)

resultsRobustAnswerLengthInteract <- lm(answerLength ~ as.factor(TextCondition) * as.factor(question), data = dataAnswerLength)
summary(resultsRobustAnswerLengthInteract)


#################################################################################
#Multi-level regression with questions nested in respondents on lexical structure
#################################################################################

#Yule's K
dataK <- dataAnalysisRespondents %>%
  pivot_longer("ProblemK":"AfDK", names_to = "question", values_to = "K")

resultsRobustK <- lm(K ~ as.factor(TextCondition) + as.factor(question), data = dataK)
summary(resultsRobustAnswerLength)

resultsRobustKInteract <- lm(K ~ as.factor(TextCondition) * as.factor(question), data = dataK)
summary(resultsRobustKInteract)

#TTR
dataTTR <- dataAnalysisRespondents %>%
  pivot_longer("ProblemTTR":"AfDTTR", names_to = "question", values_to = "TTR")

resultsRobustTTR <- lm(TTR ~ as.factor(TextCondition) + as.factor(question), data = dataTTR)
summary(resultsRobustTTR)

resultsRobustTTRInteract <- lm(TTR ~ as.factor(TextCondition) * as.factor(question), data = dataTTR)
summary(resultsRobustTTRInteract)

#FRE
dataFRE <- dataAnalysisRespondents %>%
  pivot_longer("ProblemFRE":"AfDFRE", names_to = "question", values_to = "FRE")

resultsRobustFRE <- lm(FRE ~ as.factor(TextCondition) + as.factor(question), data = dataFRE)
summary(resultsRobustFRE)

resultsRobustFREInteract <- lm(FRE ~ as.factor(TextCondition) * as.factor(question), data = dataFRE)
summary(resultsRobustFREInteract)


################################################################################
#Multi-level regression with questions nested in respondents on sentiment scores
################################################################################

dataSentiScore <- dataAnalysisRespondents %>%
  pivot_longer("ProblemSentiScore":"AfDSentiScore", names_to = "question", values_to = "sentiScore")

resultsRobustSentiScore <- lm(sentiScore ~ as.factor(TextCondition) + as.factor(question), data = dataSentiScore)
summary(resultsRobustSentiScore)

resultsRobustSentiScoreInteract <- lm(sentiScore ~ as.factor(TextCondition) * as.factor(question), data = dataSentiScore)
summary(resultsRobustSentiScoreInteract)


###################################################################
#Table D1. Correlation matrix of sentiment ratios - written answers
###################################################################

correlationText <- dataAnalysisRespondents %>% 
  dplyr::filter(TextCondition == 1 & (!is.na(ProblemSentiScore) | !is.na(ChancellorSentiScore) |
                  !is.na(CDUCSUSentiScore) |!is.na(SPDSentiScore) |!is.na(GreensSentiScore) |
                  !is.na(AfDSentiScore))) %>%
  dplyr::select(c("ProblemSentiScore":"AfDSentiScore")) %>% 
  replace_na(list(ProblemSentiScore = 0, ChancellorSentiScore = 0, CDUCSUSentiScore = 0, SPDSentiScore = 0, GreensSentiScore = 0, AfDSentiScore = 0)) %>%
  as.matrix() %>% 
  Hmisc::rcorr(type = "pearson")
correlationText #N = 1379 instead of N = 1380.


################################################################
#Table D1. Correlation matrix of sentiment ratios - oral answers
################################################################

correlationVoice <- dataAnalysisRespondents %>% 
  dplyr::filter(TextCondition == 0 & (!is.na(ProblemSentiScore) | !is.na(ChancellorSentiScore) |
                                        !is.na(CDUCSUSentiScore) |!is.na(SPDSentiScore) |!is.na(GreensSentiScore) |
                                        !is.na(AfDSentiScore))) %>%
  dplyr::select(c("ProblemSentiScore":"AfDSentiScore")) %>% 
  replace_na(list(ProblemSentiScore = 0, ChancellorSentiScore = 0, CDUCSUSentiScore = 0, SPDSentiScore = 0, GreensSentiScore = 0, AfDSentiScore = 0)) %>%
  as.matrix() %>% 
  Hmisc::rcorr(type = "pearson")
correlationVoice


#######################################
#topic number with different thresholds
#######################################

#words appearing in at least 5 answers
t.test(ProblemTopicNumberA5 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ProblemLength > 0), ])
t.test(ChancellorTopicNumberA5 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ChancellorLength > 0), ])
t.test(CDUCSUTopicNumberA5 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$CDUCSULength > 0), ])
t.test(SPDTopicNumberA5 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$SPDLength > 0), ])
t.test(GreensTopicNumberA5 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$GreensLength > 0), ])
t.test(AfDTopicNumberA5 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$AfDLength > 0), ])

#words appearing in at least 20 answers
t.test(ProblemTopicNumberA20 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ProblemLength > 0), ])
t.test(ChancellorTopicNumberA20 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ChancellorLength > 0), ])
t.test(CDUCSUTopicNumberA20 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$CDUCSULength > 0), ])
t.test(SPDTopicNumberA20 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$SPDLength > 0), ])
t.test(GreensTopicNumberA20 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$GreensLength > 0), ])
t.test(AfDTopicNumberA20 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$AfDLength > 0), ])

#5% threshold for topic assignment
t.test(ProblemTopicNumberTA5 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ProblemLength > 0), ])
t.test(ChancellorTopicNumberTA5 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ChancellorLength > 0), ])
t.test(CDUCSUTopicNumberTA5 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$CDUCSULength > 0), ])
t.test(SPDTopicNumberTA5 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$SPDLength > 0), ])
t.test(GreensTopicNumberTA5 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$GreensLength > 0), ])
t.test(AfDTopicNumberTA5 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$AfDLength > 0), ])

#20% threshold for topic assignment
t.test(ProblemTopicNumberTA20 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ProblemLength > 0), ])
t.test(ChancellorTopicNumberTA20 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$ChancellorLength > 0), ])
t.test(CDUCSUTopicNumberTA20 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$CDUCSULength > 0), ])
t.test(SPDTopicNumberTA20 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$SPDLength > 0), ])
t.test(GreensTopicNumberTA20 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$GreensLength > 0), ])
t.test(AfDTopicNumberTA20 ~ TextCondition, data = dataAnalysisRespondents[which(dataAnalysisRespondents$AfDLength > 0), ])
